In [1]:
#OS level tools
import os
import time
import datetime
import itertools
from collections import defaultdict
from glob import glob
import psutil
from functools import partial
from natsort import natsorted
import re

#array and data structure
import numpy as np
import pandas as pd
import seaborn as sb

#Ipython display and widgets
#import ipywidgets as widgets
from IPython.display import Image, HTML, display
from IPython.display import Markdown as md
#from ipywidgets import interact_manual

#holoviews and plotting
import holoviews as hv
import datashader as ds
from holoviews.operation.datashader import aggregate, shade, datashade
from bokeh.models import HoverTool
#from holoviews.operation import decimate

#dask parallelization
import dask.dataframe as dd
from dask import compute, delayed
import dask.threaded
import dask.multiprocessing

from multiprocessing import Pool, cpu_count
numOfCores=cpu_count()

#tsne
from MulticoreTSNE import MulticoreTSNE as TSNE
tsne = TSNE(n_jobs=24)

#color assignment
cmap_all=['white','white']
cmap_parent=['black','grey']
cmap_pop=(['darkgreen','lightgreen'], ['darkorange','yellow'], ['purple','blueviolet'], ['darkblue','lightblue'], ['indianred','red'])
for i in range(5):
    cmap_pop=cmap_pop+cmap_pop
background = '#D3D3D3'

#export path assignment
#scratch_path='/scratch/'+os.environ['USER']+'/'+os.environ['SLURM_JOBID']
scratch_path="."
export_path=scratch_path+"/PNG"
png_path="PNG"
try:
    os.makedirs(export_path)
except OSError as e:
    if e.errno != os.errno.EEXIST:
        raise   
        
#export = partial(export_image, export_path=export_path, background=background)
WARNING: param.Version now supports PEP440 and a new tag based workflow. See param/version.py for more details
In [2]:
hv.notebook_extension('bokeh')
display(HTML("<style>.container { width:100% !important; overflow-x: auto;white-space: nowrap;}</style>"))
hv.opts("RGB [toolbar=None, width=400, height=400, bgcolor='#D3D3D3', fontsize={'title':15, 'xlabel':10, 'ylabel':10, 'ticks':5}]")
In [3]:
#%load_ext memory_profiler
In [4]:
def config_objects(s):
    try:
        with open(s) as config_file:
            config_file.seek(0)
            gates={}
            for line in config_file:
                phenoType=""
                line = line.strip()
                gate = line.split("\t")
                if len(gate)==12:
                    phenoType=gate[11]
                gates.update({"pop"+str(gate[0]):[int(gate[0]), int(gate[1]), int(gate[2]), int(gate[3]), int(gate[4]), int(gate[5]), int(gate[6]), int(gate[7]), int(gate[8]), int(gate[9]), int(gate[10]), phenoType]})
            return gates
    except:
        raise Exception("Error parsing configuration file")
        
def config_summary(s, h):
    try:
        with open(s) as config_file:
            config_file.seek(0)
            gates={}
            for line in config_file:
                phenoType=""
                line = line.strip()
                gate = line.split("\t")
                xmarker=str(h[int(gate[1])-1])
                ymarker=str(h[int(gate[2])-1])
                startx=int((float(gate[3])/200)*4096)
                starty=int((float(gate[5])/200)*4096)
                endx=int((float(gate[4])/200)*4096)
                endy=int((float(gate[6])/200)*4096)
                parent="pop"+gate[7]
                if len(gate)==12:
                    phenoType=gate[11]
                gates.update({"pop"+str(gate[0]):[int(gate[0]), parent, xmarker, ymarker, phenoType, startx, endx, starty, endy]})
            return gates
    except:
        raise Exception("Error parsing configuration file")
        
_nsre = re.compile('([0-9]+)')
def natural_sort_key(s):
    return [int(text) if text.isdigit() else text.lower()
            for text in re.split(_nsre, s)]   

def natural_sort(l): 
    #https://stackoverflow.com/a/4836734/846892
    convert = lambda text: int(text) if text.isdigit() else text.lower() 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

def label_color (pops, row):
    eventcolor=0
    for i, pop in enumerate(pops):
        if row[pop]==0:
            eventcolor=i+1
    return eventcolor

def label_color2 (pops, row):
    eventcolor="base"
    for i, pop in enumerate(pops):
        if row[pop]==0:
            eventcolor=pop
    return eventcolor

def parseCohort(s):
    cohort_file=open(s)
    
    return

def parseDataFrame(s):
    result_file=open(s)
    sampleLabel=os.path.splitext(s)[0]
    events = sum(1 for line in result_file) -1 #quickly determine number of events
    result_file.seek(0) #rewind file to beginning
    header = result_file.readline()
    header = header.strip()
    headers = header.split("\t")
    
    pop_offset=len(headers)
    popList=[]
    for i,header in enumerate(headers):
        if header == "pop1":
            pop_offset=i
        if "pop" in header:
            popList.append(header)
    markers = headers[0:pop_offset]
    result_file.seek(0) #rewind file to beginning
    
    df = pd.read_csv(s, sep='\t')
    dataIndex={}
    for i,header in enumerate(headers):
        dataIndex.update({header:i})
    df['pop0']=0
    return [sampleLabel,headers,markers,popList,df]

def parseDAFi(s):
    df = pd.read_csv(s, sep='\t')
    df['pop0']=0
    return df

def html_row(file):
     return '<img src="{}" style="display:inline;margin:1px" title="{}"/>'.format(export_path+"/"+file+".png",file,file)

def sanitize_name(name):
    newName=name
    newName=newName.replace(" ", "")
    newName=newName.replace(".fcs", "")
    newName=newName.replace(".", "_")
    newName=newName.replace("-", "n")
    newName=newName.replace("+", "p")
    newName=newName.replace("(", "_")
    newName=newName.replace(")", "")
    return newName

#def html_row(file):
#     return '<img src="{}?{}" style="display:inline;margin:1px" title="{}"/>'.format(export_path+"/"+file+".png",time.time(),file,file)
In [5]:
def colNormalize(df):
    result = df.copy()
    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = 0
        result[feature_name] = ((df[feature_name] - min_value) / (max_value - min_value))*100
    return result.round(2)
In [6]:
display(md("# FlowGate Data Analysis Report"))
display(md("## Generated: "+str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M"))))

FlowGate Data Analysis Report

Generated: 2024-06-22 23:55

In [7]:
titlefilename=glob('description.txt')
if titlefilename:
    if (os.stat(titlefilename[0]).st_size != 0):
        titlefile=open(titlefilename[0])
        title=titlefile.readline()
        desc=" "
        desc=titlefile.readline()
        #desc=desc.replace("\n"," \n### ")
        #print(desc)
        dataset=" "
        dataset=titlefile.readline()
        titlefile.close()
        display(md("# Analysis Title: %s"%(title)))
        display(md("## Dataset: %s"%(dataset)))
        display(md("### Description: %s"%(desc)))
#     else:
#         display(md("# Dataset: no description given"))
# else:
#     display(md("# Dataset: no description given"))
In [8]:
%%output backend='bokeh'
%%opts Table [width=1200]
metadatafilename=glob('metadata.txt')
if metadatafilename:
    if (os.stat(metadatafilename[0]).st_size != 0):
        metadatafile=open(metadatafilename[0])
        try:
            metaheader = metadatafile.readline()
            metaheader = metaheader.strip()
            metaheaders = metaheader.split("\t")
            metaDf=pd.read_csv('metadata.txt', sep='\t')
            metaTable=hv.Table(metaDf)
            display(md("## Metadata"))
            display(metaTable)
        except pd.errors.EmptyDataError:
            display(md("## No Metadata"))
            #display(md("## error parsing metadata"))
#     else:
#         display(md("## No Metadata"))
#         #display(md("## metadata file empty!"))
# else:
#     display(md("## No Metadata"))
#     #display(md("## No metadata info given"))
In [9]:
gatedFiles=sorted(glob('Gated/*/flock*.txt'))
gatedDelayed=[[(os.path.split(os.path.dirname(fn))[1]),delayed(parseDAFi)(fn)] for fn in gatedFiles]
sample_labels=[sanitize_name(os.path.split(os.path.dirname(fn))[1]) for fn in gatedFiles]
dfArray=compute(*gatedDelayed, get=dask.threaded.get)
In [10]:
headers=list(dfArray[0][1])
pop_offset=len(headers)
popList=[]
for i,header in enumerate(headers):
    if header == "pop1":
        pop_offset=i
    if "pop" in header:
        popList.append(header)
markers = headers[0:pop_offset]

Configuration

In [11]:
%%output backend='bokeh'
%%opts Table.gates [width=1200]
%%opts Table.summary (row_headers=False) [width=1200]
configLabel="pipeline.config"
gates=config_objects(configLabel)
num_gates = len(gates)
summary=config_summary(configLabel, headers)
num_gates = len(summary)

gatesummary = [v for v in summary.values()]
di = {summary.get(element)[0]:str(summary.get(element)[0]).zfill(2)+"_"+summary.get(element)[4] for i,element in enumerate(summary)}
summaryTable=hv.Table(gatesummary,kdims=['Population','Parent','XMarker','YMarker','phenotype','startx', 'endx', 'starty', 'endy'], group='summary', label='Summary')

sortedTable=summaryTable.sort('Population')
sortedTable
Out[11]:
In [12]:
%%output backend='bokeh'
axis_popIndexDict = defaultdict(list)
popBounds={}
axises=[]
composite_axis=0
last_xmarker=""
last_ymarker=""
last_parent=0
gatesconfig=[]
for i in range(len(gates)):
    pop="pop"+str(i+1)
    config=gates.get(pop)
    xmarker=str(headers[config[1]-1])
    ymarker=str(headers[config[2]-1])
    startx=int((float(config[3])/200)*4096)
    starty=int((float(config[5])/200)*4096)
    endx=int((float(config[4])/200)*4096)
    endy=int((float(config[6])/200)*4096)
    parent=int(config[7])
    ctype=int(config[8])
    phenotype=config[11]
    if phenotype=="":phenotype=pop
    popBounds.update({pop:[xmarker, ymarker, startx,starty,endx,endy,ctype,"pop"+str(parent),phenotype]})
    key="axis"+str(composite_axis).zfill(2) 
    if (xmarker != last_xmarker) or (ymarker != last_ymarker) or (parent != last_parent):
        composite_axis=composite_axis+1
        key="axis"+str(composite_axis).zfill(2)
        axises.append([xmarker,ymarker,key,"pop"+str(parent)])
    axis_popIndexDict[key].append(pop)
    last_xmarker=xmarker
    last_ymarker=ymarker
    last_parent=parent
    gatesconfig.append([pop,xmarker,ymarker,parent,ctype,phenotype])

# num_axises = len(axises)
# markerTable=hv.Table(markers,kdims=['Markers'])
# axis_popTable=hv.Table(axis_popIndexDict, kdims=['Axis Index'], vdims=['sub populations'])
# markerTable+axis_popTable.sort('Axis Index')

Composite 2D Dot-Plots Gated Populations

In [13]:
hv.notebook_extension('matplotlib')
In [14]:
hv.opts("RGB [width=600, height=600, bgcolor='#D3D3D3', fontsize={'title':8, 'xlabel':15, 'ylabel':15, 'ticks':10}]")
In [15]:
hv.opts("Points.cent (color='purple' marker='+' size=10)")
In [16]:
poplist=natural_sort(di.values())
In [17]:
popBounds["pop0"]=['Whole',"Whole", 0, 0, 0, 0, 0, 'N/A', 'Whole']
In [18]:
size=600
popdfPlots = hv.HoloMap({(sample, poplist[j]): datashade(hv.Points(dfArray[k][1].loc[dfArray[k][1][gate[0]]==0], kdims=[gate[1], gate[2]]), width=size, height=size, x_range=(0,4096), y_range=(0,4096), dynamic=False, link_inputs=False, cmap=cmap_pop[j])
                    for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Pop'])
alldfPlots = hv.HoloMap({(sample, poplist[j]): datashade(hv.Points(dfArray[k][1], kdims=[gate[1], gate[2]]), width=size, height=size, x_range=(0,4096), y_range=(0,4096), dynamic=False, link_inputs=False, cmap=cmap_all)
                    for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Pop'])
parentdfPlots = hv.HoloMap({(sample, poplist[j]): datashade(hv.Points(dfArray[k][1].loc[(dfArray[k][1]["pop"+str(gate[3])]==0) & (dfArray[k][1][gate[0]]==1)], kdims=[gate[1], gate[2]]), width=size, height=size, x_range=(0,4096), y_range=(0,4096), dynamic=False, link_inputs=False, cmap=cmap_parent)
                    for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Pop'])
boundarydfPlots=hv.HoloMap({(sample, poplist[j]): (hv.Bounds((popBounds.get(gate[0])[2], popBounds.get(gate[0])[3], popBounds.get(gate[0])[4], popBounds.get(gate[0])[5])).opts(style=dict(line_color=cmap_pop[j][0],color=cmap_pop[j][0])) if gate[4]==0 else hv.Bounds((0,0,0,0)))
                    for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Pop'])
captions=hv.HoloMap({(sample, poplist[j]): (hv.Text(4096, 0, "Parent:"+str(popBounds.get(popBounds.get(gate[0])[7])[8])).opts(style=dict(color="black"))*hv.Text(popBounds.get(gate[0])[4], popBounds.get(gate[0])[5], gate[5]+"("+str(len(dfArray[k][1].loc[dfArray[k][1][gate[0]]==0]))+")").opts(style=dict(line_color=cmap_pop[j][0],color=cmap_pop[j][0])))
                    for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Pop'])# centroiddfPlots = hv.HoloMap({(sample, j+1): hv.Points(centDict.get(sample+"_"+reclustermap.get(gate[0])), kdims=[gate[1], gate[2]], group="cent")
#                     for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Pop']) centroiddfPlots = hv.HoloMap({(sample, j+1): hv.Points(centDict.get(sample+"_"+reclustermap.get(gate[0])), kdims=[gate[1], gate[2]], group="cent")
#                     for k, sample in enumerate(sample_labels) for j, gate in enumerate(gatesconfig)}, kdims=['Sample', 'Pop'])
In [19]:
def outputSampleGates(sample):
    newName=sample
    filename=export_path+"/"+newName
    hv.output(hv.NdLayout(combineddfPlots[newName,:]).cols(1), backend='matplotlib', size=200, fig='png', filename=filename)
    
    return filename

def outputSampleGates2(sample, cols, size):
    newName=sanitize_name(sample)
    filename=export_path+"/h_"+newName
    hv.output(hv.NdLayout(combineddfPlots[sample,:]).cols(cols), backend='matplotlib', size=size, fig='png', filename=filename)
    
    return filename

def outputPopGates(pop):
    originalPop=pop
    pop=sanitize_name(pop)
    
    filename=export_path+"/"+pop
    
    length=len(combineddfPlots[:,originalPop])
    hv.output(hv.NdLayout(combineddfPlots[:,originalPop]).cols(6), backend='matplotlib', size=200, fig='png', filename=filename)
    
    return filename

def outputPopBokehGates(pop):
    
    
    filename=export_path+"/"+pop.replace(" ", "")
    length=len(combineddfPlots[:,pop])
    temp=hv.NdLayout(combineddfPlots[:,pop]).cols(length)
    
    return temp
In [20]:
#%%time
combineddfPlots=alldfPlots*parentdfPlots*popdfPlots*boundarydfPlots*captions
In [21]:
#%%time
scomp_pool = Pool(processes=min(8,numOfCores),maxtasksperchild=2)
for pop in poplist:
    scomp_pool.apply_async(outputPopGates, args=[pop])
scomp_pool.close()
scomp_pool.join()
In [22]:
#%%time
scomp_pool = Pool(processes=min(8,numOfCores),maxtasksperchild=2)
for sample in sample_labels:
    scomp_pool.apply_async(outputSampleGates2, args=[sample,len(gates),200])
    #scomp_pool.apply_async(outputSampleGates2, args=[sample])
scomp_pool.close()
scomp_pool.join()
In [23]:
def composite_pop(pop):
    pop=sanitize_name(pop)
    pop="func_"+pop
    
    script='''<script>{}_show=true; 
    function {}_toggle() {{
    if ({}_show){{
    $('.{}').hide();
    $('#'+'{}_button').css('color','grey');
    }} else {{
    $('.{}').show();
    $('#'+'{}_button').css('color','black')
    }}
    {}_show = !{}_show
    }} 
    $( document ).ready({}_toggle);
    </script>'''.format(pop, pop, pop, pop, pop, pop, pop, pop, pop, pop)
    return script

def composite_sample(sample):
    sample=sanitize_name(sample)
    sample="func_"+sample
    
    script='''<script>{}_show=true; 
    function {}_toggle() {{
    if ({}_show){{
    $('.{}').hide();
    $('#'+'{}_button').css('color','grey');
    }} else {{
    $('.{}').show();
    $('#'+'{}_button').css('color','black')
    }}
    {}_show = !{}_show
    }} 
    $( document ).ready({}_toggle);
    </script>'''.format(sample, sample, sample, sample, sample, sample, sample, sample, sample, sample)
    return script

def button_hide(button):
    button=sanitize_name(button)
    button="func_"+button
    
    script='''<script>
    $('.{}').hide();
    $('#'+'{}_button').css('color','grey');
    {}_show = !{}_show
    </script>'''.format(button, button, button)
    return script

def composite_gates():
    pop="gates"
    
    script='''<script>{}_show=false; 
    function {}_toggle() {{
    if ({}_show){{
    $('.{}').hide();
    }} else {{
    $('.{}').show();
    }}
    {}_show = !{}_show
    }} 
    $( document ).ready({}_toggle);
    </script>'''.format(pop, pop, pop, pop, pop, pop, pop, pop, pop, pop)
    return script

def display_sample(sample):
    sample=sanitize_name(sample)
    
    script="<div class=\""+"func_"+sample+"\" >"+html_row("h_"+sample)+"</div>"
    return script

def display_pop(pop):
    pop=sanitize_name(pop)
    
    script="<div class=\""+"func_"+pop+"\" >"+html_row(pop)+"</div>"
    return script

def buttons_pop(pop):
    pop=sanitize_name(pop)
    popfunction = "func_"+pop
    popButton = popfunction+"_button"
    script='''<input type="submit" id="{}" formaction="javascript:{}_toggle()" value="{}">'''.format(popButton,popfunction,pop)
    return script

def buttons_sample(sample):
    sample=sanitize_name(sample)
    samplefunction = "func_"+sample
    sampleButton = samplefunction+"_button"
    script='''<input type="submit" id="{}" formaction="javascript:{}_toggle()" value="{}">'''.format(sampleButton,samplefunction,sample)
    return script

def button_gates():
    script='''<input type="submit" formaction="javascript:{}_toggle()" value="{}">'''.format("gates","gates")
    return script
In [24]:
display(md("## Cross-Sample Dot Plots of Cell Populations"))
display(md("### Click population ID to toggle show/hide"))
buttonsHtml="<form>"
for i, pop in enumerate(poplist):
    if (i%10) != 0:
        buttonsHtml=buttonsHtml+buttons_pop(pop)
    else:
        buttonsHtml=buttonsHtml+"</form><form>"+buttons_pop(pop)
#buttonsHtml=buttonsHtml+"</form>"+"<form>"+button_gates()+"</form>"
HTML(buttonsHtml)

Cross-Sample Dot Plots of Cell Populations

Click population ID to toggle show/hide

Out[24]:
In [25]:
HTML('''<script>
zoomin=100
function popzoom_in() {
 zoomin = zoomin+100
 $('div.pops').css('width',zoomin+'%');
 $('p.pops').text('Canvas Size: '+zoomin+'%')
} 
$( document ).ready(popzoom_in);
function popzoom_out() {
 zoomin = zoomin-100
 if (zoomin<100) {
     zoomin=100
 }
 $('div.pops').css('width',zoomin+'%');
 $('p.pops').text('Canvas Size: '+zoomin+'%')
} 
$( document ).ready(popzoom_out);
</script>
<form style="display:inline;margin:1px" action="javascript:popzoom_in()"><input type="submit" value="Zoom in"></form><form style="display:inline;margin:1px" action="javascript:popzoom_out()"><input type="submit" value="Zoom out"></form><p class="pops">Canvas Size: 100%</p>''')
Out[25]:

Canvas Size: 100%

In [26]:
pophtml="".join(display_pop(pop) for pop in poplist)
display(HTML('<div class="pops">'+pophtml+'</div>'))
In [27]:
display(md("## Step by Step Sequential Gating Dot Plots"))
display(md("### Click sample ID/name to toggle show/hide"))
sampleButtonsHtml="<form>"
for i, sample in enumerate(sample_labels):
    if (i%10) != 0:
        sampleButtonsHtml=sampleButtonsHtml+buttons_sample(sample)
    else:
        sampleButtonsHtml=sampleButtonsHtml+"</form><form>"+buttons_sample(sample)
#buttonsHtml=buttonsHtml+"</form>"+"<form>"+button_gates()+"</form>"
HTML(sampleButtonsHtml)

Step by Step Sequential Gating Dot Plots

Click sample ID/name to toggle show/hide

Out[27]:
In [28]:
samplehtml="".join(display_sample(sample) for i,sample in enumerate(sample_labels))
In [29]:
HTML('''<script>
zoomin=100
function gateszoom_in() {
 zoomin = zoomin+100
 $('div.gates').css('width',zoomin+'%');
 $('p.gates').text('Canvas Size: '+zoomin+'%')
} 
$( document ).ready(gateszoom_in);
function gateszoom_out() {
 zoomin = zoomin-100
 if (zoomin<100){
     zoomin = 100
 }
 $('div.gates').css('width',zoomin+'%');
 $('p.gates').text('Canvas Size: '+zoomin+'%')
 
} 
$( document ).ready(gateszoom_out);
</script>
<form style="display:inline" action="javascript:gateszoom_in()">
<input type="submit" value="Zoom in"></form>
<form style="display:inline" action="javascript:gateszoom_out()">
<input type="submit" value="Zoom out"></form>
<p class="gates">Canvas Size: 100%</p>''')

# HTML('''<script>
# zoomin=100
# function zoom_in() {
#  $('div.gates').css('width',zoomin+'%');
#  $('p.gates').text(zoomin+'%')
#  zoomin = zoomin+100
#  if (zoomin>800) {zoomin = 100}
# } 
# $( document ).ready(zoom_in);
# </script>
# <form action="javascript:zoom_in()"><input type="submit" value="Zoom cycle"></form><p class="gates">100%</p>''')
Out[29]:

Canvas Size: 100%

In [30]:
display(HTML('<div class="gates">'+samplehtml+'</div>'))
In [31]:
# display(md("## Complete Gating Dot Plots"))
# display(md("### Click gates button to toggle show/hide"))
# buttonsHtml="<form>"+button_gates()+"</form>"
# HTML(buttonsHtml)
In [32]:
# #%%time
# scomp_pool = Pool(processes=min(8,numOfCores),maxtasksperchild=2)
# for sample in sample_labels:
#     scomp_pool.apply_async(outputSampleGates, args=[sample])
#     #scomp_pool.apply_async(outputSampleGates2, args=[sample])
# scomp_pool.close()
# scomp_pool.join()
In [33]:
# gateshtml="".join(html_row(sample) for sample in natsorted(sample_labels))
# gateshtml="<div class=\"gates\">"+gateshtml+"</div>"
# display(HTML(gateshtml))
In [34]:
#HTML("".join(composite_pop(pop) for pop in natsorted(poplist))+"".join(composite_sample(sample) for i, sample in enumerate(sample_labels))+composite_gates())
HTML("".join(composite_pop(pop) for pop in natsorted(poplist))+"".join(composite_sample(sample) for i, sample in enumerate(sample_labels)))
Out[34]:

Batch Population Statistics

Population Percentage and Events Tables

In [35]:
batchpercent_df = pd.read_csv('Gated/Batch_percentages.txt', sep='\t', index_col=0)
batchpercent_df=batchpercent_df.rename(di).round(2)
In [36]:
batchevents_df = pd.read_csv('Gated/Batch_events.txt', sep='\t', index_col=0)
batchevents_df=batchevents_df.rename(di)
In [37]:
parentslist=pd.DataFrame(gatesconfig, index=batchpercent_df.index)[3].tolist()
percent_parent_df=batchpercent_df.copy()
percent_parent_df.insert(0,"Parent",parentslist)
In [38]:
batchnorm_df = colNormalize(batchevents_df)
display(md("#### Population Percentage (by Parent's size) table"))
with pd.option_context('display.max_columns', None):
    display(percent_parent_df)
    
display(md("#### Events table"))
with pd.option_context('display.max_columns', None):
    display(batchevents_df)

# display(md("#### Population Percentage (by Singlets size) table"))
# with pd.option_context('display.max_columns', None):
#     display(batchnorm_df)

Population Percentage (by Parent's size) table

Parent PATIENT_13_BMA_M1.fcs PATIENT_15_BMA_M1.fcs PATIENT_23_BMA_M1.fcs PATIENT_7_BMA_M1.fcs PATIENT_9_BMA_M1.fcs
01_FSC_Singlets 0 99.29 96.77 97.54 98.77 98.90
02_SSC_Singlets 1 99.49 97.47 98.00 99.11 99.53
03_CD45vsSSC_H 2 7.83 35.05 43.15 14.36 75.55
04_CD64vsCD14 3 93.40 79.50 88.10 99.00 99.93
05_HLA_DRvsCD13 4 75.90 90.42 94.43 32.70 98.54
06_CD34vsCD11b 5 74.92 49.57 97.73 69.88 99.41
07_CD7vsCD10 6 86.99 94.08 1.71 96.85 68.61
08_FSC_AvsSSC_A 7 96.20 96.18 80.10 95.37 97.22

Events table

PATIENT_13_BMA_M1.fcs PATIENT_15_BMA_M1.fcs PATIENT_23_BMA_M1.fcs PATIENT_7_BMA_M1.fcs PATIENT_9_BMA_M1.fcs
01_FSC_Singlets 74636 68536 68404 46409 58974
02_SSC_Singlets 74254 66802 67039 45996 58697
03_CD45vsSSC_H 5816 23414 28930 6604 44348
04_CD64vsCD14 5432 18615 25488 6538 44316
05_HLA_DRvsCD13 4123 16832 24068 2138 43671
06_CD34vsCD11b 3089 8343 23522 1494 43415
07_CD7vsCD10 2687 7849 402 1447 29785
08_FSC_AvsSSC_A 2585 7549 322 1380 28957
In [39]:
percentdf=batchpercent_df.transpose()
#percentdf=batchpercent_df.reindex(sorted(batchpercent_df.columns), axis=1).transpose()
#percentdf=batchnorm_df.transpose()

Combined Percent/Events Dataframe

In [40]:
%%output backend="bokeh"
%%opts Table [width=1000]

p_df=pd.DataFrame(batchpercent_df.unstack())
p_df.columns=['Percent']

e_df=pd.DataFrame(batchevents_df.unstack())
e_df.columns=['Events']

c_df=pd.concat([p_df,e_df],axis=1, join='outer').reset_index()
c_df.columns=['Sample','Population','Percent','Events']
c_df=c_df.replace({"Population":di})

Box Plots

In [41]:
%%output backend="bokeh" size=200
%%opts BoxWhisker [xrotation=45]
percentBoxPlot=hv.BoxWhisker(c_df, kdims=['Population'],vdims='Percent').relabel('Population Percent Box Plot')
eventsBoxPlot=hv.BoxWhisker(c_df, kdims=['Population'],vdims='Events').relabel('Population Events Box Plot')
percentBoxPlot+eventsBoxPlot
Out[41]:
In [42]:
# %%output backend="bokeh"
# %%opts Scatter [width=1200 height=600 scaling_method='width' scaling_factor=0.1 size_index=2 show_grid=True tools=['hover']] 
# %%opts Scatter (color=Cycle('Category20') alpha=0.8 line_color='k')
# %%opts NdOverlay [legend_position='bottom' show_frame=False]
# cdf_table=hv.Table(c_df,kdims=['Sample','Population'],vdims=['Percent','Events'])
# cdf_scatter = cdf_table.to.scatter('Population', ['Percent','Events'])
# cdf_plot=cdf_scatter.overlay('Sample').relabel("Cross Sample Population Percent Comparison with Events Scaling")
# (percentBoxPlot*cdf_plot).relabel("All-sample Population Percentage Boxplot with Relative Events Scaling")
# bdf_scatter.overlay('Sample')

MFI

In [43]:
# def buildMFIDataFrame(dfArray, markerList, gatesummary):
#     mainList=[]
#     columnNames=[]
#     sampleList=[]
#     for gate in gatesummary:
#         for marker in markerList:
#             columnNames.append(gate[4]+" ["+marker+"]")
#     for sample in dfArray:
#         tempList=[]
#         currentDf=sample[1]
#         sampleList.append(sample[0])
#         for gate in gatesummary:
#             for marker in markerList:
#                 tempList.append((currentDf.loc[currentDf["pop"+str(gate[0])]==0])[marker].mean())
#         mainList.append(tempList)
    
            
#     MFIdf=pd.DataFrame(mainList, columns=columnNames)
#     MFIdf.index=sampleList
#     return MFIdf.transpose()

# MFI=buildMFIDataFrame(dfArray, markers, gatesummary)
# with pd.option_context('display.max_columns', None, 'display.max_rows', None):
#     display(MFI)

Identification of Potential Outlier Samples by tSNE Visualization of Their Cell Population Percentages

In [44]:
# tsne_data_array=percentdf.values.astype(np.float64)
In [45]:
# data_tsne = tsne.fit_transform(np.copy(tsne_data_array))
# dfn=pd.DataFrame(data_tsne, columns=['tsne-x','tsne-y'], index=percentdf.index).round(4)
# results=pd.concat([percentdf,dfn],axis=1)
# results.insert(0,"file",percentdf.index)
# colnames=list(results)[0:-2]
# with pd.option_context('display.max_columns', None, 'display.max_rows', 20):
#     display(results)
In [46]:
# from bokeh.models import HoverTool
# hover = HoverTool(tooltips="""
#     <div>
#         <div>
#             <rotate style="font-size: 17px; font-weight: bold;">@file</rotate>
#             <rotate style="font-size: 15px; color: #966;">[$index]</rotate>
#         </div>
#         <div>
#             <img
#                 src="./PNG/@file.png" height="300" width="100"
#                 style="float: left; margin: 0px 0px 0px 0px;"
#                 border="2"
#             ></img>
#         </div>
         
#     </div>
#     """
# )
In [47]:
# from bokeh.models.widgets import DataTable, TableColumn, HTMLTemplateFormatter
# template="""
# <div style="background:<%= 
#     (function colorfromint(){
#         if(value >= 95){
#             return("#200000")}
#         else if(value >= 80){
#             return("#600000")}
#         else if(value >= 70){
#             return("#800000")}
#         else if(value >= 60){
#             return("#A00000")}
#         else if(value >= 50){
#             return("#FF0000")}
#         else if(value >= 40){
#             return("#CC3333")}
#         else if(value >= 30){
#             return("#FF9966")}
#         else if(value >= 15){
#             return("#FFCC99")}
#         else if(value >= 3){
#             return("#FFFFCC")}
        
#         }()) %>; 
#     color:<%= 
#     (function colorfromint(){
#         if(value >= 30){
#             return("white")}
#         else{return("black")}
#         }()) %>"> 
# <%= value %></div>
# """
# formater =  HTMLTemplateFormatter(template=template)

# def apply_format(plot, element):
#     construct=plot.handles['plot']
#     for i, column in enumerate(construct.columns[1:-2]):
#         column.formatter=formater
    
In [48]:
# %%output backend='bokeh'
# %%opts Scatter.tSNE (size=5 nonselection_color='grey' cmap='Reds') [bgcolor='#D3D3D3' color_index=2 width=500 height=500 tools=['hover','box_select','poly_select','reset']] 
# %%opts Layout [shared_datasource=True]
# %%opts Table.tSNE (row_headers=False) [width=1600 height=500]
# labels=[kd for i, kd in enumerate(colnames[4:len(colnames)])]
# holomap = hv.HoloMap({(kd): hv.Scatter(results, kdims=['tsne-x','tsne-y'],vdims=[kd, 'file'], group="tSNE") for i, kd in enumerate(colnames[4:len(colnames)])}, kdims='Population')
# tSNEtable = hv.Table(results, group="tSNE")
# display(md("####      Select one or multiple rows/samples to view them on tSNE map"))
# hv.Layout(tSNEtable.opts(plot=dict(finalize_hooks=[apply_format]))+holomap.layout().cols(3)).cols(1)
In [49]:
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[49]: